use "WID_income_data.dta", clear
** redistribution measures

* top one percent
g red_t1 = 100*((post_p99p100-pre_p99p100)/post_p99p100)
g red_t1_disp = 100*((pdisp_p99p100-pre_p99p100)/pdisp_p99p100)

g red_t1_abs = post_p99p100-pre_p99p100
g red_t1_abs_disp = pdisp_p99p100-pre_p99p100

* top ten percent
g red_t10 = 100*((post_p90p100-pre_p90p100)/post_p90p100)
g red_t10_disp = 100*((pdisp_p90p100-pre_p90p100)/pdisp_p90p100)

g red_t10_abs = post_p90p100-pre_p90p100
g red_t10_abs_disp = pdisp_p90p100-pre_p90p100

* top twenty percent
g red_t20 = 100*((post_p80p100-pre_p80p100)/post_p80p100)
g red_t20_disp = 100*((pdisp_p80p100-pre_p80p100)/pdisp_p80p100)

g red_t20_abs = post_p80p100-pre_p80p100
g red_t20_abs_disp = pdisp_p80p100-pre_p80p100

* top thirty percent
g red_t30 = 100*((post_p70p100-pre_p70p100)/post_p70p100)
g red_t30_disp = 100*((pdisp_p70p100-pre_p70p100)/pdisp_p70p100)

g red_t30_abs = post_p70p100-pre_p70p100
g red_t30_abs_disp = pdisp_p70p100-pre_p70p100

* middle quintile
g red_m20 = 100*((post_p40p60-pre_p40p60)/post_p40p60)
g red_m20_disp = 100*((pdisp_p40p60-pre_p40p60)/pdisp_p40p60)

g red_m20_abs = post_p40p60-pre_p40p60
g red_m20_abs_disp = pdisp_p40p60-pre_p40p60

* middle fourty percent
g red_m40 = 100*((post_p30p70-pre_p30p70)/post_p30p70)
g red_m40_disp = 100*((pdisp_p30p70-pre_p30p70)/pdisp_p30p70)

g red_m40_abs = post_p30p70-pre_p30p70
g red_m40_abs_disp = pdisp_p30p70-pre_p30p70

* bottom 40
g red_b40 = 100*((post_p0p40-pre_p0p40)/post_p0p40)
g red_b40_disp = 100*((pdisp_p0p40-pre_p0p40)/pdisp_p0p40)

g red_b40_abs = post_p0p40-pre_p0p40
g red_b40_abs_disp = pdisp_p0p40-pre_p0p40

* bottom 20
g red_b20 = 100*((post_p0p20-pre_p0p20)/post_p0p20)
g red_b20_disp = 100*((pdisp_p0p20-pre_p0p20)/pdisp_p0p20)

g red_b20_abs = post_p0p20-pre_p0p20
g red_b20_abs_disp = pdisp_p0p20-pre_p0p20

* bottom thirty percent
g red_b30 = 100*((post_p0p30-pre_p0p30)/post_p0p30)
g red_b30_disp = 100*((pdisp_p0p30-pre_p0p30)/pdisp_p0p30)

g red_b30_abs = post_p0p30-pre_p0p30
g red_b30_abs_disp = pdisp_p0p30-pre_p0p30

*** redistribution measures with public goods spending distributed as an equal lump-sum

* bottom 20
g red_b20ls = 100*((share_lspost_p0p20-pre_p0p20)/share_lspost_p0p20)
g red_b20_absls = share_lspost_p0p20-pre_p0p20

* bottom 30
g red_b30ls = 100*((share_lspost_p0p30-pre_p0p30)/share_lspost_p0p30)
g red_b30_absls = share_lspost_p0p30-pre_p0p30

* middle 20
g red_m20ls = 100*((share_lspost_p40p60-pre_p40p60)/share_lspost_p40p60)
g red_m20_absls = share_lspost_p40p60-pre_p40p60

* middle 40
g red_m40ls = 100*((share_lspost_p30p70-pre_p30p70)/share_lspost_p30p70)
g red_m40_absls = share_lspost_p30p70-pre_p30p70

* top 30 percent
g red_t30ls = 100*((share_lspost_p70p100-pre_p70p100)/share_lspost_p70p100)
g red_t30_absls = share_lspost_p70p100-pre_p70p100

* top 20 percent
g red_t20ls = 100*((share_lspost_p80p100-pre_p80p100)/share_lspost_p80p100)
g red_t20_absls = share_lspost_p80p100-pre_p80p100

* top 10 percent
g red_t10ls = 100*((share_lspost_p90p100-pre_p90p100)/share_lspost_p90p100)
g red_t10_absls = share_lspost_p90p100-pre_p90p100

* top 1 percent
g red_t1ls = 100*((share_lspost_p99p100-pre_p99p100)/share_lspost_p99p100)
g red_t1_absls = share_lspost_p99p100-pre_p99p100


* generating top and bottom inequality measures
g top_inequality = pre_p99p100/pre_p40p60
g top_inequality_t20 = pre_p80p100/pre_p40p60
g top_inequality_t10 = pre_p90p100/pre_p40p60
g bottom_inequality_b20 = pre_p40p60/pre_p0p20


** merging w CPDS 
replace country="USA" if country=="United States"

merge 1:1 country year using "CPDS_1960-2018_Update_2020.dta"
drop _merge

replace country="United States" if country=="USA"
encode country, g(ccode)

* merging w marriage rates
merge 1:1 country year using "marriage_rate.dta"
drop _merge

* merging w female labor force participation rate
merge 1:1 country year using "female_labor_force_participation.dta"
drop _merge

* merging w gdp per capita
merge 1:1 country year using "GDP_CAP.dta"
drop _merge

* country sample
g s=1 if country=="Austria"
replace s=1 if country=="Belgium"
replace s=1 if country=="Denmark"
replace s=1 if country=="Finland"
replace s=1 if country=="France"
replace s=1 if country=="Germany"
replace s=1 if country=="Greece"
replace s=1 if country=="Ireland"
replace s=1 if country=="Italy"
replace s=1 if country=="Netherlands"
replace s=1 if country=="Norway"
replace s=1 if country=="Portugal"
replace s=1 if country=="Spain"
replace s=1 if country=="Sweden"
replace s=1 if country=="Switzerland"
replace s=1 if country=="United Kingdom"
replace s=1 if country=="United States"
replace s=0 if s!=1

keep if s==1

* interpolating values for the control variables
sort country year
by country: ipolate openc year, g(iopenc) e

sort country year
by country: ipolate female_labor year, g(ifemale_labor) e

sort country year
by country: ipolate marriage_rate year, g(imarriage_rate) e

sort country year
by country: ipolate ud year, g(iud) e

* ln
g ln_openc=ln(iopenc)
g ln_marriage=ln(imarriage_rate)

** kaopen
* imputing values for CH before 1996
g ikaopen=kaopen
replace ikaopen=1 if country=="Switzerland" & year<1996

* final recodes:

* trend var
g trend=year-1979

drop ccode
encode country, g(ccode)
xtset ccode year

* generating cumulative left partisanship
bysort country (year): gen cum_left = sum(gov_left2)

* rescaling
replace cum_left=cum_left/100
replace iopenc=iopenc/100

* public goods spending as proportion of total national income
g pb_ni=itotal_benefits/total_ni

* keeeping only data after 1980
keep if year>1979

save "data_analysis_final", replace
